import torch, time

# 随机大矩阵
a = torch.randn(5000, 5000, device="cuda")
b = torch.randn(5000, 5000, device="cuda")

# GPU 矩阵乘法
start = time.time()
c = a @ b
torch.cuda.synchronize()  # 等待 GPU 完成计算
print("GPU time:", time.time() - start, "seconds")

# CPU 矩阵乘法
a_cpu = a.cpu()
b_cpu = b.cpu()
start = time.time()
c_cpu = a_cpu @ b_cpu
print("CPU time:", time.time() - start, "seconds")

